import sys
from BasicLibraries import *
import auxPlot
import os,sys
import _B_get_feature_importance as gfi
from matplotlib.lines import Line2D
from itertools import combinations
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=UserWarning)

optimization_method = 'GA'


AP = auxPlot.PlotMsc()

#=== Get year range
up_to = 2021
Z = pd.read_csv('0_5_LandscapePopulation_0724_full.txt')
max_year = up_to+1
min_year =max_year-4
    
#==== Choose your population
population_type_ = 'LandscapePopulation_0724_full.txt'
feature_type_ = 'opt_'+optimization_method+'_feature_importance_0724.pckl'
process_type_ = 'opt_'+optimization_method+'_process_data_0724.pckl'.replace('.pckl', '_'+str(up_to)+'.pckl')
stat_type_ = 'opt_'+optimization_method+'_deviation_stat_0724.pckl'.replace('.pckl', '_'+str(up_to)+'.pckl')
tmp_outputs_ = 'tmp_outputs/opt_'+optimization_method+'_forest_'
#====
nomenclature_dict = {'r&d investments': 'Innovation capacity',
                    'new products': 'Innovation capacity',
                    'association': 'Innovation capacity',
                    'organizational structuring': 'Innovation capacity',\
                    'training': 'Risk mitigation',
                    'adoption of standards and rules': 'Risk mitigation',
                    'assessment and measurement': 'Risk mitigation',
                    'modification of procedures': 'Risk mitigation',
                    'asset modification': 'Risk mitigation'}

ONEDRIVE = ''
FIGURES = os.getcwd()+'/Figures/'
SAVEPLOT_ = False


#===

ACTIONS = [  'association',
             'adoption of standards and rules',
             'assessment and measurement',
             'organizational structuring',
             'modification of procedures',
             'asset modification',
             'training',
             'r&d investments',
             'new products',
     ]
sdgs = ['water & energy', 'cons. & prod.', 'biodiversity']


population_files = [prefix+'_'+population_type_ for prefix in ['0_0', '0_25', '0_5', '0_75', '1_0']]
process_files = [prefix+'_'+process_type_ for prefix in ['0_0', '0_25', '0_5', '0_75', '1_0']]
feature_files = [prefix+'_'+feature_type_ for prefix in ['0_0', '0_25', '0_5', '0_75', '1_0']]
stat_files = [prefix+'_'+stat_type_ for prefix in ['0_0', '0_25', '0_5', '0_75', '1_0']]
tmp_outputs = list(np.array(np.ravel([[tmp_outputs_+prefix+'_'+str(Y)+'.pckl' for prefix in ['0_0', '0_25', '0_5', '0_75', '1_0']] for Y in range(min_year, max_year)])))


'''
This file creates data to generate figures and the function to plot them
The figures are stored in the folder fig_data/
'''

#%%
class GenerateDataForFigures():
    def __init__(self):
        pass
    def stdErr(self, X): return(X.dropna().std()/np.sqrt(len(X.dropna())))
    def stdErrBootstrapped(self, X,N=5000, func='np.median'):
        n  =  int(len(X)*0.85)
        r = []
        for i in range(N):
            tmp = np.random.choice(np.ravel(X.values).tolist(), n, replace = True)
            r.append(eval(func)(tmp))
        #=== The bootstrapped standard error of the median is simply the standard deviation of the boostrapped statistics
        stdError = np.std(r)
        return(stdError)
    ##############################################################################
    def plot_out_of_sample_test(self, postfix='_standard', ax=None, panelLabel=True):
        rf, gp, lm = pickle.load(open('binarization_robustness_prediction'+postfix+'.pckl', 'rb'))
        
        z = rf
        z = z[z.Q == 0.75]
        z['W'] = z['W'].replace([1, 2, 3, 4], [0.25,0.5,0.75,1])
        if ax == None: ax = plt.subplot()
        sns.boxplot(x = 'W', y = 'correlation', data = z, palette='Blues', showfliers=False, ax  = ax)
        plt.axhline(y=0, ls = '-.', lw = 0.5, c = 'k')
        plt.xlabel('Weight to financial performance')
        plt.ylabel(r'Out-of-sample correlation, $\rho$')
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        if panelLabel:
            ax.text(-0., 1.15, 'a)', transform=ax.transAxes,
                    fontsize=24, fontweight='bold', va='top', ha='right')
    def plot_out_of_sample_test_lmgp_comparison(self, postfix='_standard'):
        rf, gp, lm = pickle.load(open('binarization_robustness_prediction'+postfix+'.pckl', 'rb'))

        plt.figure(figsize = (22,6))
        ax = plt.subplot(121)
        s = rf.correlation
        l = lm.correlation 
        ratio1 = pd.DataFrame((s-l)) 
        ratio1 = ratio1[ratio1.abs() < ratio1.abs().quantile(0.99)]
        ratio1['ratio'] = ['Random Forest versus \n Linear Model']*len(ratio1)
        s = rf.correlation 
        l = gp.correlation 
        ratio2 = pd.DataFrame((s-l)) 
        ratio2 = ratio2[ratio2.abs() < ratio2.abs().quantile(0.99)]
        ratio2['ratio'] = ['Random Forest versus \n Gaussian Process']*len(ratio2)
        
        r = pd.concat((ratio1, ratio2)).dropna()
        sns.boxplot(x = 'ratio', y = 'correlation', data = r, ax = ax, palette = ['teal', 'brown'], \
                    showmeans = True, showfliers=False, meanprops={"marker":"o",
                       "markerfacecolor":"red", 
                       "markeredgecolor":"black",
                       "markersize":"10"})
        plt.axhline(y = 0, ls = '-.', c = 'k')
        plt.ylabel(r'Relative performance, $\rho_{RF} - \rho_x$')
        plt.xlabel('Model comparison')
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax = plt.subplot(122)
        s = rf.rmse
        l = lm.rmse 
        ratio1 = pd.DataFrame((s-l)) 
        ratio1 = ratio1[ratio1.abs() < ratio1.abs().quantile(0.99)]
        ratio1['ratio'] = ['Random Forest versus \n Linear Model']*len(ratio1)
        s = rf.rmse 
        l = gp.rmse 
        ratio2 = pd.DataFrame((s-l)) 
        ratio2 = ratio2[ratio2.abs() < ratio2.abs().quantile(0.99)]
        ratio2['ratio'] = ['Random Forest versus \n Gaussian Process']*len(ratio2)
        
        r = pd.concat((ratio1, ratio2)).dropna()
        sns.boxplot(x = 'ratio', y = 'rmse', data = r, ax = ax, palette = ['teal', 'brown'], \
                    showmeans = True, showfliers=False, meanprops={"marker":"o",
                       "markerfacecolor":"red", 
                       "markeredgecolor":"black",
                       "markersize":"10"})
        plt.axhline(y = 0, ls = '-.', c = 'k')
        plt.ylabel(r'Relative performance, RMSE$_{RF} - $RMSE$_x$')
        plt.xlabel('Model comparison')
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        plt.tight_layout()
        
        plt.savefig('Figures/Figure_oos_comparison.pdf', bbox_inches = 'tight', dpi = 300)
        
    #================================================================
    #=== Random tests
    #================================================================

    def plot_random_placebo_tests(self, bnr = '0_75', random_test_name = 'Random test', allocation_type = 'allocation_1', 
                                  absolute_values=False, ax = None,  panelLabel=True, panelLabelText='b)'):
        '''
        Main text figure
        '''
        if ax == None: ax = plt.subplot()
        rnd_test, pla_test = pickle.load(open('random_test_'+bnr+'.pckl', 'rb'))
        rnd_test = rnd_test.rename(columns = {allocation_type: 'allocation'})
        #==
        c1 = rnd_test[['Weight', 'permutation', 'allocation']]
        if absolute_values:
            c1 = c1.abs()
        c1.columns = ['Weight', 'Permutation test', random_test_name]
        c1['Weight']  =  c1['Weight'].astype(str)
        c1 = c1.set_index('Weight').unstack().reset_index()
        if len(pla_test) > 0:
            c2 = pla_test[['Weight', 'p1', 'p2']].rename(columns = {'p1': 'Closest placebo', 'p2': 'Furthest placebo'}).set_index('Weight').unstack().reset_index()
            c1 = pd.concat((c1, c2))
            
        #==
        sns.pointplot(x = 'Weight', y = 0, hue = 'level_0', estimator = np.median, data = c1, dodge=0.3, 
                      palette = ['#006991', 'red', '#43163d', '#f58220', 'k'], 
                      linestyles='', capsize = 0.1, errorbar = ('ci', 95), ax = ax); 
        plt.axhline(y=0,c= 'k', ls= '-.')
        plt.legend(loc = 'center', bbox_to_anchor = (0.5,1.1),ncol=2)
        plt.ylabel('Excess performance over \nrandom allocations, $\mathbb{E}[\mathcal{P}_o] - \mathbb{E}[\mathcal{P}_p]$')
        plt.xlabel('Weight to financial performance')
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        #ax.xaxis.set_ticks(np.linspace(0,1,5))
        if panelLabel:
            if absolute_values:
                panelLabelText = ''
            ax.text(-0., 1.15, panelLabelText, transform=ax.transAxes,
              fontsize=24, fontweight='bold', va='top', ha='right')
    
    def plot_random_placebo_distances(self):
        rnd_test, pla_test = pickle.load(open('random_test_'+bnr+'.pckl', 'rb'))
        a = rnd_test.copy()
        b = pla_test.copy()
        x0 = a[['Weight', 'permutation_distance', 'allocation_distance']].set_index('Weight').unstack().reset_index()
        x0['level_0'] = x0['level_0'].replace(['permutation_distance', 'allocation_distance'], ['Permutation test', 'Random test'])
        x1 = b[['Weight', 'p1dist', 'p2dist']].set_index('Weight').unstack().reset_index()
        x1['level_0'] = x1['level_0'].replace(['p1dist', 'p2dist'], ['Closest placebo', 'Furthest placebo'])
        x = pd.concat((x0,x1))
        x[0] = 1.-x[0]/27
        sns.boxplot(data = x.reset_index(drop=True), y=0, hue = 'level_0', x = 'Weight', palette = ['#006991', 'red', '#43163d', '#f58220', 'k'], showfliers = False)
        plt.legend(loc = 'center left', bbox_to_anchor = (1,0.5),  title = '')
        plt.ylabel('Proportion of \noverlapping allocations')
        plt.xlabel('Weight to financial performance')
        plt.savefig('Figures/Figure_tests_differences.pdf', bbox_inches = 'tight', dpi = 300)
    
    def random_placebo_threshold_robustness(self):
        plt.figure(figsize = (32,8))
        outer_count=0
        count=0
        marker_type = ['o', 'v', '*']
        percentiles_ = ['50%', '85%', '95%']
        for bnr in ['0_5', '0_85', '0_95']:
            threshold_text = str(int(float(bnr.replace('_', '.'))*100))+'%'
            ax = plt.subplot(int(str(13)+str(outer_count+1)))
            
            rnd_test, pla_test = pickle.load(open('random_test_'+bnr+'.pckl', 'rb'))
            rnd_test = rnd_test.rename(columns = {'allocation_1': 'allocation'})

            a = rnd_test.copy()
            b = pla_test.copy()
            #==
            c1 = rnd_test[['Weight', 'permutation', 'allocation']]
            c1.columns = ['Weight', 'Permutation test', 'Random test']
            c1 = c1.set_index('Weight').unstack().reset_index()
            if len(pla_test) > 0:
                c2 = pla_test[['Weight', 'p1', 'p2']].rename(columns = {'p1': 'Closest placebo', 'p2': 'Furthest placebo'}).set_index('Weight').unstack().reset_index()
                c1 = pd.concat((c1, c2))
            count+=1
                
            #==
            sns.pointplot(x = 'Weight', y = 0, hue = 'level_0', estimator = np.median, data = c1, dodge=0.3, 
                          palette = ['#006991', 'red', '#43163d', '#f58220', 'k'], 
                          linestyles='', capsize = 0.1, errorbar = ('ci', 95), ax = ax); 
            plt.axhline(y=0,c= 'k', ls= '-.')
            if bnr == '0_85':
                plt.legend(loc = 'center', title = '', bbox_to_anchor = (0.5,1.1),ncol=2)
            else:
                ax.get_legend().set_visible(False)
            if bnr == '0_5':
                plt.ylabel('Excess performance over \nrandom allocations, $\mathbb{E}[\mathcal{P}_o] - \mathbb{E}[\mathcal{P}_p]$')
            else:
                plt.ylabel('')
            plt.xlabel('Weight to financial performance')
            ax.spines['top'].set_visible(False)
            ax.spines['right'].set_visible(False)
            ax.text(-0., 1.15, percentiles_[outer_count], transform=ax.transAxes,
              fontsize=24, fontweight='bold', va='top', ha='right')
            outer_count+=1
        plt.tight_layout()

        plt.savefig('Figures/Figure_quantile_robustness_SI.pdf', bbox_inches = 'tight', dpi = 300)
    
    def make_Figure3(self, bnr='0_75',  marker_type='o'):
        plt.figure(figsize = (12, 6))
        ax2 = plt.subplot()
        self.plot_random_placebo_tests( ax = ax2, panelLabel=False)
        plt.tight_layout()
        plt.savefig('Figures/Figure3.pdf', bbox_inches = 'tight', dpi = 300)

    def make_Figure3OOSSI(self, bnr='0_75',  marker_type='o'):
        plt.figure(figsize = (14, 6))
        ax2 = plt.subplot()
        self.plot_out_of_sample_test( ax = ax2, panelLabel=False)
        plt.tight_layout()
        plt.savefig('Figures/Figure3OOSSI.pdf', bbox_inches = 'tight', dpi = 300)
                
    def validity_further_robustness(self):
        rnd_test, pla_test = pickle.load(open('random_test_0_75.pckl', 'rb'))
        rnd_test = rnd_test.rename(columns = {'allocation_1': 'allocation'})

        
        resp, resa, resb1, resb2, resb3 = [], [], [], [], []
        for N in [0,0.25,0.5,0.75,1]:
            cp = rnd_test[rnd_test.Weight == N].permutation
            ca = rnd_test[rnd_test.Weight == N].allocation

            for _ in range(500):
                b = pd.DataFrame(np.random.standard_t(2, 10000)/22)
                b = b[b.abs() < b.quantile(0.99)].dropna().values.ravel().tolist()
                ap = median_test(cp.dropna(),b)[1]
                aa = median_test(ca,b)[1]

                resp.append([N, ap, 'Random permutation'])
                resa.append([N, aa, 'Random allocation'])

        resp = pd.DataFrame(resp, columns = ['Weight', 'p-value', 'Test-type'])
        resa = pd.DataFrame(resa, columns = ['Weight', 'p-value', 'Test-type'])
        resb1 = pd.DataFrame(resb1, columns = ['Weight', 'p-value', 'Test-type'])
        resb2 = pd.DataFrame(resb2, columns = ['Weight', 'p-value', 'Test-type'])
        res = pd.concat((resp, resa, resb1, resb2))
        res = res[res['Test-type'] != 'Random placebo']
        plt.figure()
        ax = plt.subplot()
        sns.boxplot(x = 'Weight', y = 'p-value', hue = 'Test-type', data = res, palette = ['#006991', 'red', '#43163d', '#f58220'], showfliers = False, ax = ax)
        plt.axhline(y = 0.05, c = 'r', lw = 0.5, ls = '-.')
        plt.axhline(y = 0.1, c = 'r', lw = 0.5, ls = '--')
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        plt.xlabel('Weight to financial performance')
        plt.savefig('Figures/Figure_validitySI_pvalues.pdf', bbox_inches = 'tight', dpi = 300)
    def random_test_in_time(self, bnr = '0_75', random_test_name = 'Random test', allocation_type = 'allocation_1', 
                                  absolute_values=False, ax = None,  panelLabel=True, panelLabelText='b)'):
        plt.figure(figsize=(24,8))
        cnt = 1
        titles = ['Permutation test', 'Random test']
        for t in ['permutation', 'allocation']:
            ax = plt.subplot(int(str(12)+str(cnt)))
            rnd_test, pla_test = pickle.load(open('random_test_'+bnr+'.pckl', 'rb'))
            rnd_test = rnd_test.rename(columns = {allocation_type: 'allocation'})
            #==
            c1 = rnd_test[['Weight', t, 'rfyear']]
            if absolute_values:
                c1 = c1.abs()
            c1.columns = ['Weight', 'Permutation test', 'year']
            c1 = c1.sort_values(by = 'year')
            col = ['#006991', 'red']
            ls = ['-', ':', '-.']
            mk = ['o', '*', 'v']
            count=0
            for Weight in [0,0.5,1]:
                c1L = c1[c1.Weight == Weight]
                #==
                sns.pointplot(x = 'year', y = 'Permutation test',  estimator = np.median, data = c1L, dodge=0.3, 
                          color = col[cnt-1], 
                          markers = mk[count],
                          linestyles=ls[count], 
                          scale = 2,
                          capsize = 0.1, errorbar = ('ci', 95), ax = ax); 
                count+=1
            plt.title(titles[cnt-1]) 
            plt.axhline(y=0, c='k', lw=2)
            ax.spines['top'].set_visible(False)
            ax.spines['right'].set_visible(False)
            plt.ylim([-0.02,0.025])
            if t == 'permutation':
                plt.ylabel('Excess performance over \nrandom allocations, $\mathbb{E}[\mathcal{P}_o] - \mathbb{E}[\mathcal{P}_p]$')
            else:
                plt.ylabel('')
            plt.xlabel('Weight to financial performance')
            # access legend objects automatically created from data
            if t != 'permutation':
                handles, labels = plt.gca().get_legend_handles_labels()       
                # create manual symbols for legend
                line1 = Line2D([0], [0], label=r'$\mathcal{W} = 0$', ls='-', marker = 'o',color='k')
                line2 = Line2D([0], [0], label=r'$\mathcal{W} = 0.5$', ls=':', marker = '*',color='k')
                line3 = Line2D([0], [0], label=r'$\mathcal{W} = 1$', ls='-.', marker = 'v', color='k')
                # add manual symbols to auto legend
                handles.extend([line1, line2, line3])
                plt.legend(handles=handles, loc = 'center left', bbox_to_anchor = (1,0.5))
            cnt+=1
        plt.tight_layout()

        
    #===========================================#
    #====== Analysis of surface rugedness ======#
    #===========================================#
    def plot_ruggedness(self, save_=True):
        
        e,f  = pickle.load(open('epistasis_0_75.pckl', 'rb'))
        var_ = 'Gamma'
        
        d = e[e.Type == var_]
        m = d.pivot('Mutations', 'Weight', 'Statistics_mean')
        l = d.pivot('Mutations', 'Weight', 'Lower bound')
        u = d.pivot('Mutations', 'Weight', 'Upper bound')
        m.index/=2
        l.index/=2
        u.index/=2
        
        plt.figure(figsize = (18,6))
        ax = plt.subplot(121)
        count=0
        index_shift = [-0.25,-0.125,0,0.125,0.25]
        
        
        for n in m.columns:
            a, lb, ub = m[n], l[n], u[n]
            a.index+=index_shift[count]
            lb.index+=index_shift[count]
            ub.index+=index_shift[count]
        
            a.plot(marker = 'o', ms = 10, yerr = (a-lb, ub-a), capsize = 4,  ls = '')
            count+=1
        ax.set_xticks([1,2,4,8])
        plt.legend([r'$\mathcal{W} = '+str(n)+'$' for n in [0,0.25,0.5,0.75,1]], loc = 'center', bbox_to_anchor = (.68,0.7))
        plt.axhline(y = 0, c  = 'k', ls = '-.', lw = 1)
        plt.xlabel('Mutations')
        plt.ylabel(r'$1-\gamma$')
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.text(-0., 1.15, 'a)', transform=ax.transAxes,
          fontsize=24, fontweight='bold', va='top', ha='right')
        #===== Panel B
        rs = pickle.load(open('rugedness_test_rs.pckl', 'rb')).set_index('Weight')
        rs['Uncertainty'] = (rs['upper_bound'] - rs['lower_bound'])/2 
        
        ax = plt.subplot(122)
        rs['r/s'].plot(marker = 'o', ms=10, yerr = (rs['r/s']- rs['lower_bound'], rs['upper_bound']-rs['r/s']), capsize = 4, c = '#007b7b')
        
        plt.axhline(y = 11.8, c  = 'k', ls = '-.', lw = 1)        
        
        plt.xlabel('Weight to financial performance')
        plt.ylabel('r/s')
        plt.ylim(0,12.2)
        ax.xaxis.set_ticks(np.linspace(0,1,5))
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.text(-0., 1.15, 'b)', transform=ax.transAxes,
          fontsize=24, fontweight='bold', va='top', ha='right')
        if save_:
            plt.savefig('Figures/Figure4.pdf', bbox_inches = 'tight', dpi = 300)

       
       
    #============================================#
    #====== Analysis of feature importance ======#
    #============================================#
    def feature_importance(self):
        importance_bar, importance_mat, importance_mat_s = pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
        for N in [0,2,4]:
            feature_type =feature_files[N]
            avg_imp,  imp, imp_s, total_importance = gfi.plot_feature_importance(feature_type, min_year, max_year, save_plot=SAVEPLOT_)
            avg_imp = avg_imp.unstack().reset_index()
            avg_imp['Weight'] = [0 if N == 0 else 0.5 if N == 2 else 1][0]
            importance_bar = pd.concat((importance_bar, avg_imp))
            #====
            imp_tmp = imp.unstack().reset_index()
            imp_tmp['Weight'] = [0 if N == 0 else 0.5 if N == 2 else 1][0]
            importance_mat = pd.concat((importance_mat, imp_tmp))
            #====
            imp_tmp = imp_s.unstack().reset_index()
            imp_tmp['Weight'] = [0 if N == 0 else 0.5 if N == 2 else 1][0]
            importance_mat_s = pd.concat((importance_mat_s, imp_tmp))
        res = []
        for i in [0, 0.5, 1]:
            for y in range(min_year, max_year):
                tmp = importance_bar[(importance_bar.rfyear == y) & (importance_bar.Weight == i)]
                res.append([i, y, tmp[tmp.level_0 == 'Behavioural'][0].iloc[0]/tmp[tmp.level_0 == 'Financial'][0].iloc[0]])
        res = pd.DataFrame(res, columns = ['Weight', 'year', 'Relative importance'])
        
        dims_action = importance_mat[[0, 'ini', 'Weight']].groupby(['ini', 'Weight']).sum().reset_index()
        dims_sdgs = importance_mat[[0, 'sdg', 'Weight']].groupby(['sdg', 'Weight']).sum().reset_index()

        #======
        importance_mat['ini'] = importance_mat['ini'].replace(nomenclature_dict )
        #======        
        dims_action_grouped = importance_mat[[0, 'ini', 'Weight']].groupby(['ini', 'Weight']).sum().reset_index()

        return res, dims_action, dims_sdgs, dims_action_grouped
    def plot_feature_importance(self, res, dims_actions, dims_sdgs, dims_action_grouped, save_ = False):
        #=== 
        plt.figure()
        sns.barplot(x = 'year', y = 'Relative importance', hue = 'Weight', data = res, edgecolor = 'k', palette = 'Set2')
        plt.legend(loc = 'center left', bbox_to_anchor = (1,0.5), ncol = 1, title = 'Weight')
        plt.ylabel('Relative importance of \nfinancial dimensions')
        #plt.ylim(5,10.1)
        plt.xlabel('')
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        plt.text(-0.5,0.215, 'a)')
        plt.figure(figsize = (16,18))
        ax = plt.subplot(311)
        dims_sdgs['sdg'] = dims_sdgs['sdg'].apply(lambda x: x.split('SDG ')[1])
        dims_sdgs = dims_sdgs.rename(columns = {'Weight': 'Weight to financial performance'})
        dims_sdgs[0]*=100
        sns.barplot(x = 'Weight to financial performance', y = 0, hue = 'sdg', data = dims_sdgs,  edgecolor = 'k', palette = 'Pastel1', ax = ax)
        plt.legend(loc = 'center left', bbox_to_anchor = (1,0.5),  title = 'Environmental challenge')
        plt.ylabel('Importance level, %')
        plt.text(-0.5,7.6, 'a)')
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax = plt.subplot(312)
        dims_actions[0]*=100
        dims_actions = dims_actions.rename(columns = {'Weight': 'Weight to financial performance'})
        sns.barplot(x = 'Weight to financial performance', y = 0, hue = 'ini', data = dims_actions, edgecolor = 'k', palette = 'Set3', alpha = 0.8, ax = ax)
        plt.legend(loc = 'center left', bbox_to_anchor = (1,0.5),  title = 'Action type')
        plt.ylabel('Importance level')
        plt.tight_layout()
        plt.text(-0.5,3.7, 'b)')
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax = plt.subplot(313)
        dims_action_grouped[0]*=100
        dims_action_grouped = dims_action_grouped.rename(columns = {'Weight': 'Weight to financial performance'})
        sns.barplot(x = 'Weight to financial performance', y = 0, hue = 'ini', data = dims_action_grouped, edgecolor = 'k', palette = 'Set2', alpha = 0.8, ax = ax)
        plt.legend(loc = 'center left', bbox_to_anchor = (1,0.5),  title = 'Action type')
        plt.ylabel('Importance level')
        plt.tight_layout()
        plt.text(-0.5,9, 'c)')
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        if save_:
            plt.savefig('Figures/Figure_FeatImp.pdf', bbox_inches = 'tight', dpi = 300)
        U = []
        for FW in [0.0,0.25,0.5,0.75,1.0]:
            print('Realisation:', FW)
            population_type = str(FW).replace('.', '_')+'_'+population_type_
            feature_type = str(FW).replace('.', '_')+'_'+feature_type_
            dt = pd.read_csv(population_type)
            gvkey_list = list(np.unique(dt.gvkey))
        
            _, _, _, _, _, feature_importance = pickle.load( open(feature_type, 'rb'))
            
            s = np.median([feature_importance[str(n)].unstack().reset_index()[0].sum() for n in range(2018,2020)])
            sd = np.median([feature_importance[str(n)].unstack().reset_index()[0].std() for n in range(2018,2020)])
            U.append([FW, s, sd])
            
        U = pd.DataFrame(U, columns = ['Weight', 'Importance', 'std']).set_index('Weight')
        plt.figure()
        U.Importance.plot(yerr = U['std'], capsize = 4, marker = 'o', c  ='navy', ms = 10)
        plt.xlabel('Weight to financial performance')
        plt.ylabel('Relative feature importance')
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        if save_:
            plt.savefig('Figures/Figure_ImportanceWeight.pdf', bbox_inches = 'tight', dpi = 300)




    def plot_incidence_of_importanceSI(self, dpi_=60, save_=False):
        relevance_analysis = pd.DataFrame()
        for N in range(5):
            process_type = process_files[N]
            actions, env_sdgs, normal_fit, permutation_test_type, out0, res, prf, optima,  observed, rel_feat, irr_feat = pickle.load(open(process_type, 'rb'))
            for Y in range(min_year, max_year):
                R = rel_feat[str(Y)].drop(index = ['inv', 'firm_size', 'TangibleAssets', 'dividends', 'buyback_rate', 'MarketLeverage'])
                I = irr_feat[str(Y)]
                S = pd.concat((R, I))
                relevance_analysis = pd.concat((relevance_analysis, S), axis = 1)
        relevance_analysis = relevance_analysis.drop(index = ['Estimated', 'Reported'])
        relevance_analysis = relevance_analysis.sum(axis = 1)      
        relevance_analysis = relevance_analysis.reset_index()
        relevance_analysis['SDG'] = relevance_analysis['index'].apply(lambda x: x.split('SDG ')[1])
        relevance_analysis['Action'] = relevance_analysis['index'].apply(lambda x: x.split(' - ')[0])
        relevance_analysis = relevance_analysis.pivot('SDG', 'Action', 0)
        #==
        fs = (10,6)
        plt.figure(figsize = fs, dpi = dpi_)
        relevance_analysis/=relevance_analysis.max().max()
        ax = sns.heatmap(relevance_analysis.transpose(), annot = True, cbar =  False, fmt  = '.3g', 
                         vmin = 0,vmax=1,
                         center = 0.5, linewidths = 0.8, 
                         annot_kws={"color": "k", 'size': '24'},\
                         linecolor = 'black', alpha = 0.6,
                         cmap = 'RdYlBu')
        plt.xlabel('')
        plt.ylabel('')
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        print(relevance_analysis.transpose().to_latex(escape = False))
        if save_:
            plt.savefig('Figures/Feature_importance_SI.pdf', bbox_inches = 'tight', dpi = 300)


    #==================================================#
    #====== Analysis of the optimisation process ======#
    #==================================================#

    def within_distance(self, X, name = 'Diversity of optima'):
        res = []
        Ns = [0.,0.25,0.5,0.75,1.]
        count=0
        for w in ['_0_0_', '_0_25_', '_0_5_', '_0_75_','_1_0_']:
            print(name, w)
            for y in range(min_year, max_year):
                cols = X[w+'-'+str(y)].columns
                possible_combinations = list(combinations(cols, 2))
                for i in possible_combinations:
                    if np.random.uniform(0,1) < 0.7:
                        try:
                            a = X[w+'-'+str(y)][i[0]]
                            b = X[w+'-'+str(y)][i[1]]    
                            dista_ = abs(a-b).sum().sum()
                            if type(dista_) == np.float64 or type(dista_) == np.int64:
                                res.append([Ns[count],  i, y, dista_])
                        except:
                            continue
            count+=1
        res = pd.DataFrame(res, columns = ['Weight to financial performance', 'gvkey', 'year',  name])
        return res
    def proportion_of_optimal_initiatives(self, Xopt, Xobs, ax):
        res = []
        Ns = [0.,0.25,0.5,0.75,1.]
        count=0
        for w in ['_0_0_', '_0_25_', '_0_5_', '_0_75_','_1_0_']:
            
            for y in range(min_year, max_year):
                A = Xopt[w+'-'+str(y)]
                B = Xobs[w+'-'+str(y)]
                res.append([str(Ns[count]), A.sum().mean()/B.sum().mean()])
            count+=1
        res = pd.DataFrame(res, columns = ['weight', 'Quasioptimal over observed'])
        m = res.groupby('weight').mean()
        s = res.groupby('weight').apply(self.stdErr)
        m.plot(yerr = s, capsize =4, marker = 'o', ms = 10, ls = '', legend = False, ax = ax)
        plt.ylabel('Ratio of average number of allocations in \nquasioptimal and observed behaviours ')
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.text(-0., 1.15, 'a)', transform=ax.transAxes,
          fontsize=24, fontweight='bold', va='top', ha='right')
        return res
    def estimate_pvalue(self, X, l, r):
        a = X[X['Weight to financial performance'] == l]['Deviation from optimal behaviour']
        b = X[X['Weight to financial performance'] == r]['Deviation from optimal behaviour']
        return np.round(ttest_ind(a, b)[1], 3)
    def get_allocation_type(self, info, gvkey, year,  allocation_type):
        X = pd.DataFrame(info[allocation_type])
        X.columns = [str(gvkey)+'-'+str(year)]
        return X
    
    def solution_versus_weights(self):
        r = []
        best_sol = pd.DataFrame()
        obs_allocations_dict = dict()
        opt_allocations_dict = dict()
        for file_name in tmp_outputs:
            resultsDICTIONARY, _, _,\
                         _, _, _, _, _, _, _= pickle.load(open(file_name, 'rb'))
            year =  file_name.split('.')[0].split('_')[-1] 
            weight=  file_name.split('forest')[1].split(year)[0]
            reference_file = '_'.join(weight.split('_')[1:])+'opt_GA_process_data_0724.pckl'.replace('.pckl', '_'+str(up_to)+'.pckl')
            _, _, _, _, _, ref, _, _, _, _, _ = pickle.load(open(reference_file, 'rb'))
            ref = ref[ref.rfyear == int(year)]
            best_allocations,  obs_allocations,  opt_allocations = pd.DataFrame(),pd.DataFrame(),pd.DataFrame()
            for i in range(len(resultsDICTIONARY)):
                t = resultsDICTIONARY[i]
                if type(t) != int:
                    #=== You are only going to process it if the firm is in the final sample 
                    if int(list(resultsDICTIONARY[i]['firm_summary'].keys())[0]) in list(ref.gvkey):

                        gvkey = list(t['firm_summary'].keys())[0]
                        true_global_opt = t['true_global_optimum']
                        local_opt = t['local_optimum']
                        obs = t['observed_performance']
                        exp_ = t['expected_performance']
                        g = list(t['firm_summary'].keys())[0] 
                        best_obs = t['best_allocation_distance_humming']
                        clos_obs = t['closest_allocation_distance_humming']
                        if local_opt > exp_:
                            r.append([true_global_opt,local_opt, obs, exp_, best_obs, clos_obs, g, year, weight])                   
            
                            ##### Best and observed allocations
                            ##
                            tmp = self.get_allocation_type(t, gvkey, year, 'observed_allocation')
                            obs_allocations = pd.concat((obs_allocations, tmp), axis = 1)
                            ##
                            tmp = self.get_allocation_type(t, gvkey, year, 'closest_allocation')
                            opt_allocations = pd.concat((opt_allocations, tmp), axis = 1)



            obs_allocations_dict[weight+'-'+str(year)]  = obs_allocations
            opt_allocations_dict[weight+'-'+str(year)]  = opt_allocations
            
        
        r = pd.DataFrame(r, columns = ['global_opt', 'local_opt', 'observed_performance', 'Expected_performance', \
                                       'local_hum', 'closest_hum', 'gvkey', 'rfyear', 'weight'])
        r['weight'] = r['weight'].replace(['_0_0_', '_0_25_', '_0_5_', '_0_75_', '_1_0_'], [0.,0.25,0.5,0.75, 1])
        #=== Make sure that the search process didn't do strange things so that expected performance is smaller than the optima performance
        r = r[(r.global_opt > r.Expected_performance) & (r.local_opt > r.Expected_performance)]
        pickle.dump([r,  obs_allocations_dict,  opt_allocations_dict], open('fig_data/fig_data_global_opt_'+str(up_to)+'.pckl', 'wb'))

        #=== Diversity
        print('Estimating diversity - this might take a while')
        opt_opt = self.within_distance(opt_allocations_dict, 'Diversity of closest optima')
        obs_obs = self.within_distance(obs_allocations_dict, 'Diversity of empirical')
        pickle.dump([opt_opt, obs_obs], open('Within_distance_'+str(up_to)+'.pckl', 'wb'))




    def plot_optimal_data(self, HU, opt_opt, obs_obs, up_to,  save_=False):
        r,   obs_allocations_dict,  opt_allocations_dict = pickle.load(open('fig_data/fig_data_global_opt_'+str(up_to)+'.pckl', 'rb'))
        #r = r[r.local_opt > r.Expected_performance]
        aggregate_dim = 'Weights to financial performance'
        r_ = r.copy()
        r_ = r_.rename(columns = {'weight': 'Weights to financial performance',
                                 'Expected_performance': 'Empirical',
                                 'observed_performance': 'Observed'})
           
        r_ = r_.rename(columns = {'global_opt': 'Local optima',
                                  'local_opt': 'Quasioptimal solutions'})
        r_['performance_gap'] = r_.Empirical - r_['Quasioptimal solutions']
        cols_gaps = ['#03ad42', '#0091e4']
        plt.figure(figsize = (24,8))
        ax = plt.subplot(121)
        
        sns.pointplot(data = r_, x = 'Weights to financial performance', 
                      y  = 'performance_gap', 
                      estimator = np.median, 
                      ax = ax,
                      color = 'r',
                      ms=10,
                      linestyles='', capsize = 0.1, errorbar = ('ci', 95))
        plt.ylim(-0.082,0.003)
        plt.axhline(y=0,ls='--', c='k')
        plt.ylabel('Excess performance over quasioptimal solutions,\n $\mathbb{E}[\mathcal{P}_{empirical}] - \mathbb{E}[\mathcal{P}_{quasioptimal}]$')
        plt.xlabel('Weight to financial performance')
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        #plt.text(-0.6,0.225, r'${\bf A}$')
        ax.text(-0., 1.15, 'a)', transform=ax.transAxes,
          fontsize=24, fontweight='bold', va='top', ha='right')

        
        #==
        ax = plt.subplot(122)
        opt_opt['type'] = ['Quasioptimal solution']*len(opt_opt)
        opt_opt = opt_opt.rename(columns = {'Diversity of closest optima': 'value'})
        average_observed  = obs_obs['Diversity of empirical'].median()
        
        z = opt_opt[['Weight to financial performance', 'value', 'type']]
        sns.boxplot(x = 'Weight to financial performance', y = 'value',
                    hue = 'type', data = z, 
                    palette =  cols_gaps[:1],
                    showfliers = False, 
                    ax = ax)
        #plt.legend([], [], frameon=False)
        plt.legend(loc = 'center', bbox_to_anchor = (0.5,1.1), ncol = 3, title = '')
        plt.axhline(y = average_observed, c =  cols_gaps[1], lw =2, ls = '--')
        plt.ylabel('Behavioural diversity \n (humming distance)')
        plt.text(2.5, -1.5, 'Empirical diversity')
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.annotate("", xy=(3.5, 4.5), xytext=(3.5, 0),
            arrowprops=dict(arrowstyle="->"))

        plt.ylim(-5,20)
        ax.text(-0., 1.15, 'b)', transform=ax.transAxes,
          fontsize=24, fontweight='bold', va='top', ha='right')
        plt.tight_layout()
        if save_:
            plt.savefig('Figures/Figure5.pdf', bbox_inches = 'tight', dpi = 300)

        return r_[['Local optima', 'Quasioptimal solutions','Empirical', 'Weights to financial performance']]

    def plot_quasioptimal_performances(self, r):
        cols_gaps = ['#03ad42', '#0091e4']
        plt.figure()
        ax = plt.subplot()
        r,   obs_allocations_dict,  opt_allocations_dict = pickle.load(open('fig_data/fig_data_global_opt_'+str(up_to)+'.pckl', 'rb'))
        r = r[r.local_opt > r.Expected_performance]
        aggregate_dim = 'Weights to financial performance'
        r_ = r.rename(columns = {'weight': 'Weights to financial performance',
                                 'Expected_performance': 'Empirical',
                                 'observed_performance': 'Observed'})
           
        r_ = r_.rename(columns = {'global_opt': 'Local optima',
                                  'local_opt': 'Quasioptimal solutions'})
        A = r_[['Quasioptimal solutions','Empirical', 'Weights to financial performance']].set_index('Weights to financial performance').unstack().reset_index()
        sns.boxplot(x = 'Weights to financial performance', y = 0, 
                    hue = 'level_0', data = A, 
                    palette = cols_gaps,
                    #showfliers = False,
                    showmeans=True,
                    meanprops={"marker":"o","markerfacecolor":"r", "markeredgecolor":"r"})
        plt.legend(loc = 'center', bbox_to_anchor = (0.5,1.1), ncol = 3, title = '')
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.text(-0., 1.15, 'a)', transform=ax.transAxes,
          fontsize=24, fontweight='bold', va='top', ha='right')
        plt.ylabel('Expected performance')
        plt.axhline(y=0, ls='--', lw= 0.5, c='k')
        
        plt.savefig('Figures/Figure_empirical_performance_gapSI.pdf', bbox_inches = 'tight', dpi = 300)

    def plot_cumulative_performance(self):
        performance  =  pd.DataFrame()
        for N in range(4):
            process_type = process_files[N]
            actions, env_sdgs, normal_fit, permutation_test_type, out0, res, prf, optima,  observed, rel_feat, irr_feat = pickle.load(open(process_type, 'rb'))
            s = prf[['rfyear', 'Performance', 'OptimalPerformance']].groupby('rfyear').mean().cumsum()
            s.columns = [r'$\mathcal{P}_{observed}$', r'$\mathbb{E}[\mathcal{P}_{optimal}]$']
            s['W'] = [N]*len(s)
            performance = pd.concat((performance, s))
        plt.figure()
        ax = plt.subplot()
        colors = [['skyblue', 'lightcoral'], ['royalblue', 'orangered'], ['b', 'r'], ['darkblue', 'brown'] ]
        marker = ['o', 'v', '^', '*']
        for N in range(4):
            tmp = performance[performance.W== N]
            tmp.index = tmp.index.astype(str)
            tmp = tmp[[r'$\mathcal{P}_{observed}$', r'$\mathbb{E}[\mathcal{P}_{optimal}]$']].cumsum()
            if N == 0:
                tmp.plot(style = ['--', '-'], ax = ax, color = colors[N], marker = marker[N], ms = 10)
                plt.legend(loc = 'center', bbox_to_anchor = (0.5,1.05), ncol = 2)
            else:
                tmp.plot(style = ['--', '-'], ax = ax, color = colors[N], marker = marker[N], ms = 10, legend = False)
            plt.axhline(y = 0, ls = '-.', c= 'k', lw = 1)
            plt.xlabel('')
            plt.ylabel('Cumulative performance')
            plt.text(0, -0.25, r'$\bullet$ W=0', fontsize = 16)
            plt.text(0, -0.32, r'$\blacktriangledown $ W=0.25', fontsize = 16)
            plt.text(0, -0.40, r'$\blacktriangle$ W=0.5', fontsize = 16)
            plt.text(0, -0.47, r'$\bigstar$ W=1', fontsize = 16)
    def differentiation_performance_regression(self):
        r, obs_allocations_dict,  opt_allocations_dict = pickle.load(open('fig_data/fig_data_global_opt_'+str(up_to)+'.pckl', 'rb'))    
        r = r[r.global_opt > r.Expected_performance]
        rst = []
        weights = [0.,0.25,0.5,0.75,1.]
        r['mrg'] = r['gvkey'].astype(int).astype(str)+'-'+r['rfyear'].astype(int).astype(str)
        for N in range(5):
            process_type = process_files[N]
            population_type = population_files[N]
            dt  =pd.read_csv(population_type, low_memory= False)
            actions, env_sdgs, normal_fit, permutation_test_type, out0, res, prf, optima,  observed, rel_feat, irr_feat = pickle.load(open(process_type, 'rb'))
            metrics = ['closest_hum', 'distance_from_opt', 'score']
            res = res.merge(r[['closest_hum', 'local_hum', 'mrg']])
            T, model = AP.StatisticsPerformanceCorrelationRegression(res, dt, metrics)
            rst.append([weights[N]]+T.loc['distance measure'].values.tolist())
        rst = pd.DataFrame(rst, columns =['Weights', 'Humming distance',  'Cosine similarity', 'Optimality score'])
        return rst
    def behavioural_gap(self):
        weights = [0,0.25,0.5,0.75,1]
        sdg_challenge, activity_type, emp_mechanism = pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
        actDIST, sdgDIST, mecDIST = pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
        for N in range(5):
            stat_type =stat_files[N]
            average_deviation_b, sdg_dev_b, act_dev_b, mec_dev, act, chl, sdg_devDIST, act_devDIST, mec_devDIST = pickle.load(open(stat_type, 'rb'))
            #== Gap along the activity types
            activity_type = pd.concat((activity_type, act_dev_b.set_index('Action')['$\Delta($optimum, observed$)$']), axis = 1)
            act_typeDIST = act_devDIST.transpose().unstack().reset_index().drop(columns = ['level_1'])
            act_typeDIST['Weights'] = weights[N]
            actDIST = pd.concat((actDIST, act_typeDIST))

            #== Gap along the SDG challenges
            sdg_challenge = pd.concat((sdg_challenge, sdg_dev_b.set_index('SDG')['$\Delta($optimum, observed$)$']), axis = 1)
            sdg_challengeDIST = sdg_devDIST.transpose().unstack().reset_index().drop(columns = ['level_1'])
            sdg_challengeDIST['Weights'] = weights[N]
            sdgDIST = pd.concat((sdgDIST, sdg_challengeDIST))
            #== Gap along the empirical mechanisms
            emp_mechanism = pd.concat((emp_mechanism, mec_dev.set_index('Action')['$\Delta($optimum, observed$)$']), axis = 1)
            emp_mechanismDIST = mec_devDIST.transpose().unstack().reset_index().drop(columns = ['level_1'])
            emp_mechanismDIST['Weights'] = weights[N]
            mecDIST = pd.concat((mecDIST, emp_mechanismDIST))
        sdg_challenge.columns = [0,0.25,0.5,0.75,1]
        activity_type.columns = [0,0.25,0.5,0.75,1]
        emp_mechanism.columns = [0,0.25,0.5,0.75,1]
        return activity_type, sdg_challenge, emp_mechanism, actDIST, sdgDIST, mecDIST




    def plot_challenges_gap(self, X, ax):
        a = X.copy()
        b = pd.DataFrame()
        c = pd.DataFrame()
        for i in a.columns:
            b[i] = a[i].apply(lambda x: float(x.split('*')[0]))
            c[i] = a[i].apply(lambda x: len(x.split('*')[1:]))
        c = c.replace([0, 1, 2, 3], ['', '*', '**', '***'])

        z = b.transpose().unstack().reset_index()
        z.columns = ['Challenge', 'W', 'value']  
        z['Challenge'] = z['Challenge'].apply(lambda x: ' '.join(x.split('SDG ')[1:]).capitalize())
        z['Challenge'] = z['Challenge'].replace('', 'Biodiversity')
        c =c.transpose().unstack().reset_index()
        c.columns = ['Challenge', 'W', 'value']   
        c['Challenge'] = c['Challenge'].apply(lambda x: ' '.join(x.split('SDG ')[1:]).capitalize())
        c['Challenge'] = c['Challenge'].replace('', 'Biodiversity')

        barplot = sns.barplot(x = 'W', y = 'value', hue = 'Challenge', data = z, 
                              palette = ['#038c93', '#ff4f61', '#87b8ea'], ax = ax)
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        plt.legend(fontsize = 20, loc = 'center left', bbox_to_anchor = (1,0.5), ncol=1)
        count=0
        for p, sig in zip(barplot.patches,c['value']):
            if p.get_height() > 0:
                yp = p.get_height()
            else:
                yp = p.get_height()-2.5
            barplot.text(p.get_x() + p.get_width() / 2., yp, 
                         c['value'].iloc[count], ha='center', fontsize = 18)
            p.set_edgecolor('k')
               
            if c['value'].iloc[count] == '':
                p.set_color('gray')  
                p.set_edgecolor('gray')
            count+=1

        plt.ylim(-25,20)
        plt.axhline(y = 0, ls = '-.', c = 'k', lw = 1)
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        plt.ylabel('')
        plt.xlabel('Weight to financial performance', fontsize = 26)
        ax.text(-0.1, 1.15, 'a)', transform=ax.transAxes,
          fontsize=24, fontweight='bold', va='top', ha='right')

    def plot_CSR_gap(self, X, ax):
        a = X.copy()
        b = pd.DataFrame()
        c = pd.DataFrame()

        for i in a.columns:
            b[i] = a[i].apply(lambda x: float(x.split('*')[0]))
            c[i] = a[i].apply(lambda x: len(x.split('*')[1:]))
        c = c.replace([0, 1, 2, 3], ['', '*', '**', '***'])

        z = b.transpose().unstack().reset_index()
        z.columns = ['Challenge', 'W', 'value']    
        c =c.transpose().unstack().reset_index()
        c.columns = ['Challenge', 'W', 'value']   
        


        barplot = sns.barplot(x = 'W', y = 'value', hue = 'Challenge', data = z, 
                              palette = ['#006ab5', '#e4181b'], ax = ax)
        plt.legend(fontsize = 20, loc = 'center left', bbox_to_anchor = (1,0.5), ncol=1)
        count=0
        for p, sig in zip(barplot.patches,c['value']):
            if p.get_height() > 0:
                yp = p.get_height()
            else:
                yp = p.get_height()-.8
            barplot.text(p.get_x() + p.get_width() / 2., yp, 
                         c['value'].iloc[count], ha='center', fontsize = 18)
            p.set_edgecolor('k')
               
            if c['value'].iloc[count] == '':
                p.set_color('gray')  
                p.set_edgecolor('gray')
            count+=1

        plt.ylim(-6,7)
        plt.axhline(y = 0, ls = '-.', c = 'k', lw = 1)
        plt.ylabel(r'$\Delta($Quasioptima,Observed)', fontsize = 26)
        plt.xlabel('Weight to financial performance', fontsize = 26)
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.text(-0.1, 1.02, 'b)', transform=ax.transAxes,
          fontsize=24, fontweight='bold', va='top', ha='right')
    
    def plot_activity_gap(self, X, ax):
        cols = ['#8f257b', '#002147', '#008AFF', 
                '#00BEFA', '#B5F0E7',
                '#D24000',  '#c2b300',
                '#92d600', '#08CDAE'][::-1]
        a = X.copy()
        b = pd.DataFrame()
        c = pd.DataFrame()
        for i in a.columns:
            b[i] = a[i].apply(lambda x: float(x.split('*')[0]))
            c[i] = a[i].apply(lambda x: len(x.split('*')[1:]))
        c = c.replace([0, 1, 2, 3], ['', '*', '**', '***'])
    
        z = b.transpose().unstack().reset_index()
        z.columns = ['Challenge', 'W', 'value']    
        z['Challenge'] = z['Challenge'].apply(lambda x: x.capitalize())
        c =c.transpose().unstack().reset_index()
        c.columns = ['Challenge', 'W', 'value']   
        c['Challenge'] = c['Challenge'].apply(lambda x: x.capitalize())
        barplot = sns.barplot(x = 'W', y = 'value', hue = 'Challenge', data = z, ax=ax,
                              palette = cols)
        plt.legend(fontsize = 20, loc = 'center left', bbox_to_anchor = (1,0.5), ncol=1)
        count=0
        for p, sig in zip(barplot.patches,c['value']):
            if p.get_height() > 0:
                yp = p.get_height()
            else:
                yp = p.get_height()-1
            barplot.text(p.get_x() + p.get_width() / 2.+0.01, yp, 
                         c['value'].iloc[count], ha='center', fontsize = 10)
            p.set_edgecolor('k')
               
            if c['value'].iloc[count] == '':
                p.set_color('gray')  
                p.set_edgecolor('gray')
            count+=1
    
        plt.ylim(-14,12)
        plt.axhline(y = 0, ls = '-.', c = 'k', lw = 1)
        plt.ylabel('')
        plt.xlabel('Weight to financial performance', fontsize = 26)
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.text(-0.1, 1.02, 'c)', transform=ax.transAxes,
          fontsize=24, fontweight='bold', va='top', ha='right')

#%%
if __name__ == '__main__':
    GD = GenerateDataForFigures()
    
    
    #%% Model validation
    GD.make_Figure3(bnr='0_75')

    #%% Model validation out-of-sample
    GD.make_Figure3OOSSI(bnr='0_75')
        
    #%% Randomsiation in time
    GD.random_test_in_time()
    #%% Threshold robustness
    GD.random_placebo_threshold_robustness()
    
    #%%
    GD.validity_further_robustness()
    
    #%%
    GD.plot_random_placebo_tests(bnr='0_75', absolute_values=True)
    plt.savefig('Figures/Figure3A_SI_ABS.pdf', dpi = 300, bbox_inches='tight')
    
    #%% Comparison of the model with linear and Gaussian processes
    GD.plot_out_of_sample_test_lmgp_comparison()

    
    
    #%%
    ######################  
    ######Ruggedness######
    ######################
    GD.plot_ruggedness(save_=True)

    #%%
    #######################  
    ######Performance######
    #######################
    #== Performance analysis
    ##############
    GD.solution_versus_weights()
    ##############

    #%% Plot the global optima as function of weight along side the observed performances
    r, obs_allocations_dict,  opt_allocations_dict = pickle.load(open('fig_data/fig_data_global_opt_'+str(up_to)+'.pckl', 'rb'))    
    r = r.rename(columns = {'weight':'Weight to financial performance'})
    opt_opt, obs_obs = pickle.load(open('Within_distance_'+str(up_to)+'.pckl', 'rb'))

    #%% plot them
    r = r[r.global_opt > r.Expected_performance]
    d = GD.plot_optimal_data(r, opt_opt, obs_obs, up_to, save_=True)
    GD.plot_quasioptimal_performances(r)

    #%% The optimum has more initiatives
    plt.figure(figsize = (20,6))
    ax = plt.subplot(121)
    GD.proportion_of_optimal_initiatives(opt_allocations_dict, obs_allocations_dict, ax = ax)  
    ax = plt.subplot(122)
    GD.plot_random_placebo_tests(bnr='0_75', random_test_name = 'Random test (2x)', allocation_type = 'allocation_2', ax = ax)
    plt.tight_layout()
    plt.savefig('Figures/Figure_random_test_2x_ini.pdf', dpi = 300, bbox_inches='tight')

    
    
    #####################  
    ######Behaviour######
    #####################

    #%% Analysis along the behavioural dimensions
    dpi_=60
    score_perf_table = GD.differentiation_performance_regression().set_index('Weights')
    score_perf_table.index = [0.,0.25,0.5,0.75,1.]
    print(score_perf_table.to_latex())

    #%%
    activity_type, sdg_challenge, emp_mechanism, actDIST, sdgDIST, mecDIST = GD.behavioural_gap()
    #== By CSR type
    sdg_challenge = sdg_challenge.rename(index = {'SDG cons. \& prod.': 'Consumption \& Production',
                                                  'SDG biodiversity': 'Biodiversity',
                                                  'SDG water \& energy': 'Water \& Energy'})
    sdgDIST = sdgDIST.set_index('level_0').rename(index = {'SDG cons. & prod.': 'Consumption & Production',
                                                  'SDG biodiversity': 'Biodiversity',
                                                  'SDG water & energy': 'Water & Energy'}).reset_index()
    print(sdg_challenge.to_latex(escape = True))
    print(emp_mechanism.to_latex(escape = True))
    print(activity_type.to_latex(escape = True))
    
    

    
    #%% Distribution of the gap
    fig, axs = plt.subplots(2, sharex=True, sharey=False, gridspec_kw={'hspace': 0},figsize = (18,16))
    ax1 = plt.subplot(211)
    sns.boxplot(x='Weights', y = 0, hue = 'level_0', data = sdgDIST, showfliers = False, showmeans=True,palette = ['#038c93', '#ff4f61', '#87b8ea'] ); 
    plt.legend(loc = 'center left', bbox_to_anchor = (1,0.5), title = '')
    plt.axhline(y=0, c = 'k', lw=3, ls='-.')
    plt.ylabel('')
    ax1.spines['top'].set_visible(False)
    ax1.spines['right'].set_visible(False)
    ax2 = plt.subplot(212)
    sns.boxplot(x='Weights', y = 0, hue = 'level_0', data = mecDIST, showfliers = False, showmeans=True,palette = ['#006ab5', '#e4181b'] ); 
    plt.legend(loc = 'center left', bbox_to_anchor = (1,0.5), title = '')
    plt.axhline(y=0, c = 'k', lw=3, ls='-.')
    plt.xlabel('Weight to financial performance', fontsize = 26)
    plt.ylabel(r'$\Delta($Quasiptima,Observed)', fontsize = 26, y=1)
    ax2.spines['top'].set_visible(False)
    ax2.spines['right'].set_visible(False)
    plt.savefig('Figures/Figure6.pdf', dpi = 300, bbox_inches='tight')

    #%%
    #plt.figure(figsize = (20,18))
    fig, axs = plt.subplots(3, sharex=True, sharey=False, gridspec_kw={'hspace': 0},figsize = (18,16))
    ax1 = plt.subplot(311)
    GD.plot_challenges_gap(sdg_challenge, axs[0])
    ax2 = plt.subplot(312)
    GD.plot_CSR_gap(emp_mechanism, axs[1])
    ax3 = plt.subplot(313)
    GD.plot_activity_gap(activity_type, axs[2])
    plt.tight_layout()

    plt.savefig('Figures/Figure6_SI_averages.pdf', dpi = 300, bbox_inches='tight')

    #%% For the SI
    #%%
    ####################    
    #Feature importance#
    ####################
    #%%
    #== Feature importance
    res, dims_action, dims_sdg, dims_action_grouped = GD.feature_importance()
    GD.plot_feature_importance(res, dims_action, dims_sdg, dims_action_grouped, save_=True)
    GD.plot_incidence_of_importanceSI(save_=True)

    
    
    
    
    
    